from lxml import  etree
import  pymysql
import  requests
import  time


class DB:
    def __init__(self,host='',port=3306,user='',password='',db='',charset='utf8'):
        self.conn = pymysql.connect(host=host,port=port,user=user,password=password,database=db,charset=charset)
        self.cur = self.conn.cursor()

    def __enter__(self):    #初始化类之后执行的 进入的时候
        return self.cur

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.conn.commit()
        self.cur.close()
        self.conn.close()
def dataurl(url):
    header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4242.0 Safari/537.36'}
    res = requests.get(url,headers=header)
    res.encoding='utf-8'
    html=etree.HTML(res.text)
    return html
def dataHTML(html,db):
    result=html.xpath('//ul[@class="newlist"]//li')#全部标签

    for i in result:


        result1= i.xpath('./cite[@class="aut"]/a/font/text()')#作者
        result2 = i.xpath('normalize-space(./cite[@class="last"]/text())')#更新时间
        result3 = i.xpath('normalize-space(./cite/text())') #阅读数
        result4=  i.xpath('./span/a/text()')[-1] #标题
        result5= i.xpath('./span/a/text()') [0] # 吧名
        result6 = i.xpath('normalize-space(./cite/text())')[0]  # 评论数
        # result6= i.xpath()#文章名
        para =[result1,result2,result3,result4,result5,result6]
        print(para)
        db.execute(
            'insert into guba (author,update_,read_,title,name_,comment_) values (%s,%s,%s,%s,%s,%s)'
            ,para)
def main(db):
    for i in range(1,5):
        url ='http://guba.eastmoney.com/default,0_{}.html'.format(i)
        bs =dataurl(url)
        dataHTML(bs,db)
        time.sleep(1)


if __name__ == '__main__':
        with DB(host='localhost', port=3306, user='usert', password='123456', db='guojiuchang') as db:
            main(db)


